\relax 
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyField@AuxAddToFields[1]{}
\providecommand\HyField@AuxAddToCoFields[2]{}
\citation{sonatype}
\citation{he2020software}
\citation{ji2022opensource}
\citation{martinez2021software}
\citation{apache2022log4j}
\citation{sonatype}
\citation{zimmermann2019small}
\@writefile{toc}{\contentsline {section}{\numberline {1}引言}{1}{section.1}\protected@file@percent }
\newlabel{sec:introduction}{{1}{1}{引言}{section.1}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}背景与动机}{1}{subsection.1.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2}挑战与研究空白}{1}{subsection.1.2}\protected@file@percent }
\newlabel{sec:challenges}{{1.2}{1}{挑战与研究空白}{subsection.1.2}{}}
\citation{su2024learning}
\citation{halder2024malicious}
\citation{samaana2025machine}
\citation{ladisa2023feasibility}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3}本文方法与贡献}{2}{subsection.1.3}\protected@file@percent }
\newlabel{sec:contributions}{{1.3}{2}{本文方法与贡献}{subsection.1.3}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.4}研究问题 (RQs)}{2}{subsection.1.4}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {1.5}论文组织结构}{2}{subsection.1.5}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {2}相关工作}{2}{section.2}\protected@file@percent }
\newlabel{sec:related_work}{{2}{2}{相关工作}{section.2}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}传统机器学习与元数据分析}{2}{subsection.2.1}\protected@file@percent }
\citation{sun20241+}
\citation{sejfia2022practical}
\citation{huang2024donapi}
\citation{zhang2023malicious}
\citation{yu2024maltracker}
\citation{huang2024spiderscan}
\citation{liang2023needle}
\citation{ohm2022towards}
\citation{zheng2024towards}
\citation{iqbal2025pypiguard}
\citation{sun20241+}
\citation{zahan2024leveraging}
\citation{zeshan2024they}
\citation{zeshan2024they,xue2024poster}
\citation{wang2025malpacdetector}
\citation{zhang2025automatically}
\citation{huang2024spiderscan}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}基于行为序列与图的深度分析}{3}{subsection.2.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}无监督与混合分析方法}{3}{subsection.2.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.4}大语言模型（LLM）在检测中的应用}{3}{subsection.2.4}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {3}协同参数化数据流调用图与大语言模型的NPM恶意包检测方法}{3}{section.3}\protected@file@percent }
\newlabel{sec:approach}{{3}{3}{协同参数化数据流调用图与大语言模型的NPM恶意包检测方法}{section.3}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces 我们提出的协同检测框架概览，该框架包含五个阶段：(1) 代码预处理与行为虚拟化；(2) PDCG构建；(3) 基于LLM的规则生成；(4) 协同分析与特征工程；(5) 恶意性判定。}}{4}{figure.caption.1}\protected@file@percent }
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
\newlabel{fig:framework}{{1}{4}{我们提出的协同检测框架概览，该框架包含五个阶段：(1) 代码预处理与行为虚拟化；(2) PDCG构建；(3) 基于LLM的规则生成；(4) 协同分析与特征工程；(5) 恶意性判定。}{figure.caption.1}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}阶段一：代码预处理与行为虚拟化}{4}{subsection.3.1}\protected@file@percent }
\newlabel{sec:preprocessing}{{3.1}{4}{阶段一：代码预处理与行为虚拟化}{subsection.3.1}{}}
\newlabel{lst:json_example}{{1}{4}{package.json中的恶意脚本}{lstlisting.1}{}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {1}{\ignorespaces package.json中的恶意脚本}}{4}{lstlisting.1}\protected@file@percent }
\newlabel{lst:js_virtual}{{2}{4}{虚拟化后的JavaScript代码}{lstlisting.2}{}}
\@writefile{lol}{\contentsline {lstlisting}{\numberline {2}{\ignorespaces 虚拟化后的JavaScript代码}}{4}{lstlisting.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}阶段二：参数化数据流调用图 (PDCG) 构建}{4}{subsection.3.2}\protected@file@percent }
\newlabel{sec:pdcg_construction}{{3.2}{4}{阶段二：参数化数据流调用图 (PDCG) 构建}{subsection.3.2}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.1}PDCG核心定义与设计思想}{4}{subsubsection.3.2.1}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces PDCG的设计思想：通过对AST进行修剪和简化，构建一个专注于核心行为的图表示。}}{5}{figure.caption.2}\protected@file@percent }
\newlabel{fig:pdcg_concept}{{2}{5}{PDCG的设计思想：通过对AST进行修剪和简化，构建一个专注于核心行为的图表示。}{figure.caption.2}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.2}PDCG节点与边的设计}{5}{subsubsection.3.2.2}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces PDCG的节点与边类型定义}}{5}{table.caption.3}\protected@file@percent }
\newlabel{tab:pdcg_elements}{{1}{5}{PDCG的节点与边类型定义}{table.caption.3}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.3}PDCG构建算法}{5}{subsubsection.3.2.3}\protected@file@percent }
\@writefile{loa}{\contentsline {algorithm}{\numberline {1}{\ignorespaces PDCG构建算法}}{5}{algorithm.1}\protected@file@percent }
\newlabel{alg:pdcg_construction_detailed}{{1}{5}{PDCG构建算法}{algorithm.1}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.4}关键设计与示例}{6}{subsubsection.3.2.4}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{链式调用建模}{6}{section*.4}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces 链式调用 \texttt  {a.b().c()} 对应的PDCG结构}}{6}{figure.caption.5}\protected@file@percent }
\newlabel{fig:pdcg_chain_call}{{3}{6}{链式调用 \texttt {a.b().c()} 对应的PDCG结构}{figure.caption.5}{}}
\@writefile{toc}{\contentsline {paragraph}{参数内容完整提取}{6}{section*.6}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces 对包含多种类型内容的复杂参数进行提取的PDCG示例}}{6}{figure.caption.7}\protected@file@percent }
\newlabel{fig:pdcg_argument}{{4}{6}{对包含多种类型内容的复杂参数进行提取的PDCG示例}{figure.caption.7}{}}
\@writefile{toc}{\contentsline {paragraph}{匿名回调函数识别}{6}{section*.8}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces 将匿名函数作为回调参数传递时的PDCG建模}}{6}{figure.caption.9}\protected@file@percent }
\newlabel{fig:pdcg_callback}{{5}{6}{将匿名函数作为回调参数传递时的PDCG建模}{figure.caption.9}{}}
\@writefile{toc}{\contentsline {paragraph}{自定义函数解析关联}{6}{section*.10}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces 将调用点解析到包内函数定义的PDCG示例}}{6}{figure.caption.11}\protected@file@percent }
\newlabel{fig:pdcg_resolution}{{6}{6}{将调用点解析到包内函数定义的PDCG示例}{figure.caption.11}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}阶段三：基于LLM的恶意行为规则生成}{6}{subsection.3.3}\protected@file@percent }
\newlabel{sec:rule_generation}{{3.3}{6}{阶段三：基于LLM的恶意行为规则生成}{subsection.3.3}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.1}LLM选型与输入定义}{6}{subsubsection.3.3.1}\protected@file@percent }
\citation{freeman1978centrality}
\citation{brandes2001faster}
\citation{page1999pagerank}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.2}迭代式的“扩充-融合”流程}{7}{subsubsection.3.3.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.3}“融合”过程示例}{7}{subsubsection.3.3.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.4}阶段四：协同分析与特征工程}{7}{subsection.3.4}\protected@file@percent }
\newlabel{sec:feature_engineering}{{3.4}{7}{阶段四：协同分析与特征工程}{subsection.3.4}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.4.1}语义特征：基于规则匹配的二进制向量}{7}{subsubsection.3.4.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.4.2}结构特征：基于图论的恶意行为画像}{7}{subsubsection.3.4.2}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{基础统计特征}{7}{section*.12}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{中心性特征}{7}{section*.13}\protected@file@percent }
\citation{blondel2008fast}
\citation{duan2020towards}
\citation{zheng2024towards}
\citation{datadog2023dataset}
\citation{ladisa2023feasibility}
\citation{sejfia2022practical}
\citation{ossgadget}
\@writefile{toc}{\contentsline {paragraph}{PageRank特征}{8}{section*.14}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{社区结构特征}{8}{section*.15}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.5}阶段五：基于机器学习的恶意性判定}{8}{subsection.3.5}\protected@file@percent }
\newlabel{sec:classification}{{3.5}{8}{阶段五：基于机器学习的恶意性判定}{subsection.3.5}{}}
\@writefile{toc}{\contentsline {section}{\numberline {4}实验设计}{8}{section.4}\protected@file@percent }
\newlabel{sec:experiments}{{4}{8}{实验设计}{section.4}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}研究问题重述}{8}{subsection.4.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}数据集构建}{8}{subsection.4.2}\protected@file@percent }
\newlabel{sec:dataset}{{4.2}{8}{数据集构建}{subsection.4.2}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}基线方法}{8}{subsection.4.3}\protected@file@percent }
\newlabel{sec:baselines}{{4.3}{8}{基线方法}{subsection.4.3}{}}
\citation{pedregosa2011scikit}
\citation{sejfia2022practical}
\citation{ladisa2023feasibility}
\citation{ossgadget}
\citation{duan2020towards}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}评估指标}{9}{subsection.4.4}\protected@file@percent }
\newlabel{sec:metrics}{{4.4}{9}{评估指标}{subsection.4.4}{}}
\@writefile{toc}{\contentsline {section}{\numberline {5}实验结果与分析}{9}{section.5}\protected@file@percent }
\newlabel{sec:results}{{5}{9}{实验结果与分析}{section.5}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}RQ1: 模型性能评估}{9}{subsection.5.1}\protected@file@percent }
\newlabel{sec:rq1}{{5.1}{9}{RQ1: 模型性能评估}{subsection.5.1}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.1.1}分类器选择}{9}{subsubsection.5.1.1}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces 不同分类器在我们框架下的性能对比}}{9}{table.caption.16}\protected@file@percent }
\newlabel{tab:classifier_comparison}{{2}{9}{不同分类器在我们框架下的性能对比}{table.caption.16}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.1.2}与基线方法的对比}{9}{subsubsection.5.1.2}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces 我们的方法与基线方法的性能对比}}{9}{table.caption.17}\protected@file@percent }
\newlabel{tab:baseline_comparison}{{3}{9}{我们的方法与基线方法的性能对比}{table.caption.17}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.1.3}控制变量实验：泛化能力验证}{9}{subsubsection.5.1.3}\protected@file@percent }
\newlabel{sec:controlled_experiment}{{5.1.3}{9}{控制变量实验：泛化能力验证}{subsubsection.5.1.3}{}}
\citation{zheng2024towards}
\citation{duan2020towards}
\citation{zheng2024towards}
\citation{maaten2008visualizing}
\@writefile{toc}{\contentsline {paragraph}{实验设计}{10}{section*.18}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{实验结果与分析}{10}{section*.19}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces PDCG-LLM-Detect在外部数据集上的泛化性能}}{10}{table.caption.20}\protected@file@percent }
\newlabel{tab:generalization_results}{{4}{10}{PDCG-LLM-Detect在外部数据集上的泛化性能}{table.caption.20}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Top 20 特征重要性排名。蓝色条形表示图结构特征，红色条形表示规则特征。可以清楚地看到，前三名特征均为图特征，验证了PDCG结构分析的重要价值。}}{10}{figure.caption.21}\protected@file@percent }
\newlabel{fig:feature_importance}{{7}{10}{Top 20 特征重要性排名。蓝色条形表示图结构特征，红色条形表示规则特征。可以清楚地看到，前三名特征均为图特征，验证了PDCG结构分析的重要价值。}{figure.caption.21}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}RQ2: 协同机制贡献度分析}{10}{subsection.5.2}\protected@file@percent }
\newlabel{sec:rq2}{{5.2}{10}{RQ2: 协同机制贡献度分析}{subsection.5.2}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.2.1}特征重要性分析}{10}{subsubsection.5.2.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.2.2}消融实验}{10}{subsubsection.5.2.2}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {5}{\ignorespaces 特征消融实验结果 (Random Forest)}}{10}{table.caption.22}\protected@file@percent }
\newlabel{tab:ablation_study}{{5}{10}{特征消融实验结果 (Random Forest)}{table.caption.22}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.2.3}特征空间可视化分析}{10}{subsubsection.5.2.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.3}RQ3: LLM作为知识引擎的有效性评估}{11}{subsection.5.3}\protected@file@percent }
\newlabel{sec:rq3}{{5.3}{11}{RQ3: LLM作为知识引擎的有效性评估}{subsection.5.3}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.3.1}知识的来源与提炼}{11}{subsubsection.5.3.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.3.2}知识的有效性：从简单样本到复杂场景的泛化能力}{11}{subsubsection.5.3.2}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{证据一：极高的恶意行为覆盖率}{11}{section*.24}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{证据二：全面的知识类别覆盖}{11}{section*.26}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{证据三：稳定的跨数据集表现}{11}{section*.28}\protected@file@percent }
\newlabel{fig:tsne_rules}{{8a}{12}{仅规则特征}{figure.caption.23}{}}
\newlabel{sub@fig:tsne_rules}{{a}{12}{仅规则特征}{figure.caption.23}{}}
\newlabel{fig:tsne_graph}{{8b}{12}{仅图特征}{figure.caption.23}{}}
\newlabel{sub@fig:tsne_graph}{{b}{12}{仅图特征}{figure.caption.23}{}}
\newlabel{fig:tsne_all}{{8c}{12}{全部特征}{figure.caption.23}{}}
\newlabel{sub@fig:tsne_all}{{c}{12}{全部特征}{figure.caption.23}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces 不同特征组合的t-SNE降维可视化。红色点代表恶意包，蓝色点代表良性包。}}{12}{figure.caption.23}\protected@file@percent }
\newlabel{fig:tsne_visualization}{{8}{12}{不同特征组合的t-SNE降维可视化。红色点代表恶意包，蓝色点代表良性包。}{figure.caption.23}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces 恶意包的规则命中数分布图 (Y轴为对数尺度)。横轴表示一个恶意包命中了多少条规则，纵轴表示对应数量的恶意包个数。}}{12}{figure.caption.25}\protected@file@percent }
\newlabel{fig:rule_coverage}{{9}{12}{恶意包的规则命中数分布图 (Y轴为对数尺度)。横轴表示一个恶意包命中了多少条规则，纵轴表示对应数量的恶意包个数。}{figure.caption.25}{}}
\@writefile{lot}{\contentsline {table}{\numberline {6}{\ignorespaces LLM生成的87条规则类别分布统计}}{12}{table.caption.27}\protected@file@percent }
\newlabel{tab:rule_categories}{{6}{12}{LLM生成的87条规则类别分布统计}{table.caption.27}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.3.3}结论：LLM作为知识引擎的价值}{12}{subsubsection.5.3.3}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {6}讨论}{12}{section.6}\protected@file@percent }
\newlabel{sec:discussion}{{6}{12}{讨论}{section.6}{}}
\bibcite{sonatype}{{1}{}{{}}{{}}}
\bibcite{he2020software}{{2}{}{{}}{{}}}
\bibcite{ji2022opensource}{{3}{}{{}}{{}}}
\bibcite{martinez2021software}{{4}{}{{}}{{}}}
\bibcite{apache2022log4j}{{5}{}{{}}{{}}}
\bibcite{zimmermann2019small}{{6}{}{{}}{{}}}
\bibcite{liu2018software}{{7}{}{{}}{{}}}
\bibcite{zahan2022weak}{{8}{}{{}}{{}}}
\bibcite{liu2022demystifying}{{9}{}{{}}{{}}}
\bibcite{ladisa2023sok}{{10}{}{{}}{{}}}
\bibcite{su2024learning}{{11}{}{{}}{{}}}
\@writefile{toc}{\contentsline {section}{\numberline {7}结论}{13}{section.7}\protected@file@percent }
\newlabel{sec:conclusion}{{7}{13}{结论}{section.7}{}}
\bibcite{halder2024malicious}{{12}{}{{}}{{}}}
\bibcite{samaana2025machine}{{13}{}{{}}{{}}}
\bibcite{ladisa2023feasibility}{{14}{}{{}}{{}}}
\bibcite{sun20241+}{{15}{}{{}}{{}}}
\bibcite{sejfia2022practical}{{16}{}{{}}{{}}}
\bibcite{huang2024donapi}{{17}{}{{}}{{}}}
\bibcite{zhang2023malicious}{{18}{}{{}}{{}}}
\bibcite{yu2024maltracker}{{19}{}{{}}{{}}}
\bibcite{huang2024spiderscan}{{20}{}{{}}{{}}}
\bibcite{liang2023needle}{{21}{}{{}}{{}}}
\bibcite{ohm2022towards}{{22}{}{{}}{{}}}
\bibcite{zheng2024towards}{{23}{}{{}}{{}}}
\bibcite{iqbal2025pypiguard}{{24}{}{{}}{{}}}
\bibcite{zahan2024leveraging}{{25}{}{{}}{{}}}
\bibcite{zeshan2024they}{{26}{}{{}}{{}}}
\bibcite{xue2024poster}{{27}{}{{}}{{}}}
\bibcite{wang2025malpacdetector}{{28}{}{{}}{{}}}
\bibcite{zhang2025automatically}{{29}{}{{}}{{}}}
\bibcite{yamaguchi2014modeling}{{30}{}{{}}{{}}}
\bibcite{freeman1978centrality}{{31}{}{{}}{{}}}
\bibcite{brandes2001faster}{{32}{}{{}}{{}}}
\bibcite{page1999pagerank}{{33}{}{{}}{{}}}
\bibcite{blondel2008fast}{{34}{}{{}}{{}}}
\bibcite{duan2020towards}{{35}{}{{}}{{}}}
\bibcite{ossgadget}{{36}{}{{}}{{}}}
\bibcite{datadog2023dataset}{{37}{}{{}}{{}}}
\bibcite{pedregosa2011scikit}{{38}{}{{}}{{}}}
\bibcite{maaten2008visualizing}{{39}{}{{}}{{}}}
\providecommand\NAT@force@numbers{}\NAT@force@numbers
\gdef \@abspage@last{15}
